# =====================================================
# Replication Material: APPENDIX
# König et al. (2019)
# Comparative Politics and Causal Evaluation of Structural Reforms: The Case of the UK National Minimum Wage Introduction
# =====================================================

# start log file
sink("./log_appendix.txt")

# Install packages needed for analysis
p_needed <- c("devtools", "ggplot2", "dplyr", "tidyr", "xtable", "GGally", "Synth", "lattice", "plm") 
packages <- rownames(installed.packages())
p_to_install <- p_needed[!(p_needed %in% packages)]
if (length(p_to_install) > 0) {
  install.packages(p_to_install)
}

# Main analysis with gsynth is based on version 1.0.2
library(devtools)
install_version("gsynth", version = "1.0.2", repos = "http://cran.us.r-project.org")

# load packages
library(gsynth)
library(ggplot2)
library(dplyr)
library(tidyr)
library(xtable)
library(GGally)
library(plm)
library(Synth)
library(lattice)
lattice.options(default.theme = modifyList(standard.theme(color = FALSE), list(strip.background = list(col = "transparent"))))


# =================
# load data
# =================
rm(list=ls())
load("01_data/sample.RData")

# =================
# load plotting function
# =================
source("function_plot_gsynth.R")

# ======================
# I) SUPPLEMENTAL ANALYSES
# ======================

# Figure A1
# ==========

# run analysis (DID by setting number of factors to zero)
out.a1 <- gsynth(yurate ~ mw 
                  , data = sample, 
                  index = c("ccode","Year"), force = "two-way",
                  CV = FALSE, r = 0, se = TRUE, 
                  inference = "parametric", nboots = 1000,
                  parallel = TRUE, EM=TRUE,seed = 1111)

# Counterfactual plot
plot(out.a1, type = "counterfactual", xlab="", ylab = "Youth Unemployment Rate (%)", main = "", raw = "none") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998,linetype="dashed") +  
  theme(legend.position="bottom")  +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010)) + 
  ylim(0,25)

ggsave("02_figures_tables/FigureA1.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")

# Figure A2
# ==========

# run main analysis
out.1 <- gsynth(yurate ~ mw 
                , data = sample, 
                index = c("ccode","Year"), force = "two-way",
                CV = TRUE, r = c(0, 5), se = TRUE, 
                inference = "parametric", nboots = 1000,
                parallel = TRUE, EM=TRUE,seed = 1111)

# Plot factors sizes over time
plot(out.1, type = "factors", xlab="Year", ylab = "Youth Unemployment Rate", main = "") + 
  theme_classic(20, base_family = "serif") + 
  theme(legend.position = "bottom") + 
  geom_hline(yintercept = 0, linetype="dashed") 

ggsave("02_figures_tables/FigureA2.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")


# Figure A3
# ==========
plot(out.1, type = "loadings", xlab="Year", ylab = "Youth Unemployment Rate", main = "", nfactors = 5) + 
  theme_classic(base_family = "serif") 

ggsave("02_figures_tables/FigureA3.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445*1.5, units="in")

# Figure A4a
# ==========

# set number of factors to 4
out.4factors <- gsynth(yurate ~ mw 
                     , data = sample, 
                     index = c("ccode","Year"), force = "two-way",
                     CV = FALSE, r = 4, se = TRUE, 
                     inference = "parametric", nboots = 1000,
                     parallel = TRUE, EM=TRUE,seed = 1111)

# gap plot
plot(out.4factors, type="gap", xlab = "", ylab = "Gap Youth Unemployment (% points)",  main = "") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998, linetype="dashed") +  
  geom_hline(yintercept = 0,linetype="dashed") + 
  ylim(-55,55)  +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA4a.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")

# Figure A4b
# ==========

# set number of factors to 3
out.3factors <- gsynth(yurate ~ mw 
                     , data = sample, 
                     index = c("ccode","Year"), force = "two-way",
                     CV = FALSE, r = 3, se = TRUE, 
                     inference = "parametric", nboots = 1000,
                     parallel = TRUE, EM=TRUE,seed = 1111)

# Gap plot
plot(out.3factors, type="gap", xlab = "", ylab = "Gap Youth Unemployment (% points)",  main = "") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998, linetype="dashed") +  
  geom_hline(yintercept = 0,linetype="dashed") + 
  ylim(-55,55)  +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA4b.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")

# Figure A4c
# ==========

# Counterfactural plot with four estimated factors
plot(out.4factors, type = "counterfactual", xlab="", ylab = "Youth Unemployment Rate (%)", main = "", raw = "all") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998,linetype="dashed") +  
  theme(legend.position="bottom")  +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA4c.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")

# Figure A4d
# ==========

# Counterfactural plot with three estimated factors
plot(out.3factors, type = "counterfactual", xlab="", ylab = "Youth Unemployment Rate (%)", main = "", raw = "all") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998,linetype="dashed") +  
  theme(legend.position="bottom") +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA4d.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")

# ======================
# III) SYNTHETIC CONTROL
# ======================

# Pool of control countries
country.control <- c(305, 390, 375, 255, 325, 385, 380)

# predictor variables
pred <- c("ULC", "yrcurnt", "legelec", "checks", "maj", "taxrev", "eprc_v0", "TUD", "youngAdultsprop", "inflation", "FDI", "openc", "gdp_capita")


# Table A1
# ==========

# Summarize data
sum.dat <- sample[sample$ccode %in% country.control | sample$ccode == 200,]
sum.dat <- sum.dat[colnames(sum.dat) %in% pred | colnames(sum.dat) %in% c("Year")] 
sum.dat <- sum.dat[sum.dat$Year >= 1984 & sum.dat$Year <=2012,]

tab1 <- data.frame(cbind(  apply(sum.dat[,pred],2,min,na.rm=TRUE)
                         , apply(sum.dat[,pred],2,max,na.rm=TRUE)
                         , apply(sum.dat[,pred],2,mean,na.rm=TRUE)))
names(tab1) <- c('Minimum','Maximum','Mean')

print(xtable(tab1), file = "02_figures_tables/TableA1.tex")

# run analysis
# ==============

# i)  Generate Synth object 
data <- sample %>% filter(Year >= 1989)

sdata <- Synth::dataprep(foo = data[data$ccode %in% country.control | data$ccode == 200,],
                  predictors = pred,
                  dependent = "yurate",
                  unit.variable = "ccode",
                  time.variable = "Year", 
                  treatment.identifier = 200,
                  controls.identifier = country.control, 
                  time.predictors.prior = c(1989:1998),
                  time.optimize.ssr = c(1989:1998),
                  unit.names.variable = "country",
                  time.plot = 1989:2012)

# ii) Run the synthetic control analysis 
synth.out <- synth(data.prep.obj = sdata, method = "BFGS")

# iii) Calculate output gaps from the results
gaps <- sdata$Y1plot - (sdata$Y0plot %*% synth.out$solution.w)

synth.tables <- synth.tab(dataprep.res = sdata, synth.res=synth.out)



# Figure A5
# ==========

#### Country weights from Synthetic control
# Extract weights
a <- cbind(synth.tables$tab.w[,1],synth.tables$tab.w[,2])
row.names(a) <- synth.tables$tab.w[,2]
a <- a[order(as.numeric(a[,1]), decreasing=F),]

# plot
pdf("02_figures_tables/FigureA5.pdf")
dotchart(a[,1],pch=16)
dev.off()

# Table A2
# ==========

# Predictor means
print(xtable(synth.tables$tab.pred), file = "02_figures_tables/TableA2.tex")

# Table A3
# ==========

# Variable weights
print(xtable(synth.out$solution.v), file = "02_figures_tables/TableA3.tex")

# Figure A6a
# ==========

# Plot the Path of the youth unemployment for the UK and the Synthetic control
pdf("02_figures_tables/FigureA6a.pdf")
path.plot(synth.res = synth.out, 
          dataprep.res = sdata,
          Ylab="Youth Unemployment",
          Xlab="Year",
          #Main="UK",
          Legend=c("UK","Synthetic UK"),
          Legend.position="topright", abline(v=1998,lty="dashed")
)
dev.off()

# Figure A6b
# ==========

# Plot the gap of the youth unemployment for the UK and the Synthetic control
pdf("02_figures_tables/FigureA6b.pdf")
gaps.plot(synth.res = synth.out, dataprep.res = sdata,
          Ylab= "Gap in Youth Unemployment (percentage points)", Xlab="Year"
          #, Main="Gap plot UK"
          , abline(v=1998,lty="dashed"))
dev.off()

# ======================
# IV) ROBUSTNESS TESTS
# ======================

# Figure A7a
# ==========
# Controlling for FDI
out.robust.FDI <- gsynth(yurate ~ mw + FDI
                 , data = sample[sample$Year != 2013,], # subset due to data availability
                 index = c("ccode","Year"), force = "two-way",
                 CV = TRUE, r = c(0, 5), se = TRUE, 
                 inference = "parametric", nboots = 1000,
                 parallel = TRUE, EM=TRUE,seed = 1111)

# Counterfactural plot
plot(out.robust.FDI, type = "counterfactual", xlab="", ylab = "Youth Unemployment Rate (%)", main = "", raw = "all") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998,linetype="dashed") +  
  theme(legend.position="bottom") +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA7a.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")

# Figure A7b
# ==========

# Gap plot
plot(out.robust.FDI, type="gap", xlab = "", ylab = "Gap Youth Unemployment (% points)",  main = "") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998, linetype="dashed") +  
  geom_hline(yintercept = 0,linetype="dashed") + 
  ylim(-20,20)  +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA7b.pdf",
       width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")  

# Figure A8a
# ==========
# Controlling for inflation
out.robust.inflation <- gsynth(yurate ~ mw + inflation
                       , data = sample[sample$Year != 2013,], # subset due to data availability
                       index = c("ccode","Year"), force = "two-way",
                       CV = TRUE, r = c(0, 5), se = TRUE, 
                       inference = "parametric", nboots = 1000,
                       parallel = TRUE, EM=TRUE,seed = 1111)

# Counterfactural plot
plot(out.robust.inflation, type = "counterfactual", xlab="", ylab = "Youth Unemployment Rate (%)", main = "", raw = "all") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998,linetype="dashed") +  
  theme(legend.position="bottom") +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA8a.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")

# Figure A8b
# ==========
# Gap plot
plot(out.robust.inflation, type="gap", xlab = "", ylab = "Gap Youth Unemployment (% points)",  main = "") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998, linetype="dashed") +  
  geom_hline(yintercept = 0,linetype="dashed") + 
  ylim(-20,20)  +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA8b.pdf",
       width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")  

# Figure A9a
# ==========
# Controlling for employment protection index
out.robust.eprc_v1 <- gsynth(yurate ~ mw + eprc_v1          
                     , data = sample[sample$Year != 2013 & sample$Year != 1984,], # subset due to data availability
                     index = c("ccode","Year"), force = "two-way",
                     CV = TRUE, r = c(0, 5), se = TRUE, 
                     inference = "parametric", nboots = 1000,
                     parallel = TRUE, EM=TRUE,seed = 1111)

# Counterfactural plot
plot(out.robust.eprc_v1, type = "counterfactual", xlab="", ylab = "Youth Unemployment Rate (%)", main = "", raw = "all") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998,linetype="dashed") +  
  theme(legend.position="bottom") +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA9a.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")

# Figure A9b
# ==========
# Gap plot
plot(out.robust.eprc_v1, type="gap", xlab = "", ylab = "Gap Youth Unemployment (% points)",  main = "") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998, linetype="dashed") +  
  geom_hline(yintercept = 0,linetype="dashed") + 
  ylim(-20,20)  +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA9b.pdf",
       width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in") 

# Figure A10a
# ==========
# Controlling for unit labor costs
out.robust.ucl <- gsynth(yurate ~ mw + ULC
                   , data = sample[sample$Year != 2013,], # subset due to data availability
                   index = c("ccode","Year"), force = "two-way",
                   CV = TRUE, r = c(0, 5), se = TRUE, 
                   inference = "parametric", nboots = 1000,
                   parallel = TRUE, EM=TRUE,seed = 1111)

# Counterfactural plot
plot(out.robust.ucl, type = "counterfactual", xlab="", ylab = "Youth Unemployment Rate (%)", main = "", raw = "all") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998,linetype="dashed") +  
  theme(legend.position="bottom") +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA10a.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")


# Figure A10b
# ==========
# Gap plot
plot(out.robust.ucl, type="gap", xlab = "", ylab = "Gap Youth Unemployment (% points)",  main = "") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998, linetype="dashed") +  
  geom_hline(yintercept = 0,linetype="dashed") + 
  ylim(-20,20)  +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA10b.pdf",
       width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in") 

# Figure A11a
# ==========
# Controlling for tax revenue
out.robust.taxrev <- gsynth(yurate ~ mw + taxrev
                      , data = sample[sample$Year != 2013,], # subset due to data availability
                      index = c("ccode","Year"), force = "two-way",
                      CV = TRUE, r = c(0, 5), se = TRUE, 
                      inference = "parametric", nboots = 1000,
                      parallel = TRUE, EM=TRUE,seed = 1111)

# Counterfactural plot
plot(out.robust.taxrev, type = "counterfactual", xlab="", ylab = "Youth Unemployment Rate (%)", main = "", raw = "all") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998,linetype="dashed") +  
  theme(legend.position="bottom") +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA11a.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")

# Figure A11b
# ==========

# Gap plot
plot(out.robust.taxrev, type="gap", xlab = "", ylab = "Gap Youth Unemployment (% points)",  main = "") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998, linetype="dashed") +  
  geom_hline(yintercept = 0,linetype="dashed") + 
  ylim(-20,20)  +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA11b.pdf",
       width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")  

# Figure A12a
# ==========
# Controlling for trade union density
out.robust.TUD <- gsynth(yurate ~ mw + TUD
                   , data = sample[sample$Year != 2013,], # subset due to data availability
                   index = c("ccode","Year"), force = "two-way",
                   CV = TRUE, r = c(0, 5), se = TRUE, 
                   inference = "parametric", nboots = 1000,
                   parallel = TRUE, EM=TRUE,seed = 1111)

# Counterfactural plot
plot(out.robust.TUD, type = "counterfactual", xlab="", ylab = "Youth Unemployment Rate (%)", main = "", raw = "all") + 
  theme_classic(20, base_family = "serif") +
  geom_vline(xintercept = 1998,linetype="dashed") +  
  theme(legend.position="bottom") +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA12a.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")

# Figure A12b
# ==========

# Gap plot
plot(out.robust.TUD, type="gap", xlab = "", ylab = "Gap Youth Unemployment (% points)",  main = "") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998, linetype="dashed") +  
  geom_hline(yintercept = 0,linetype="dashed") + 
  ylim(-20,20)  +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA12b.pdf",
       width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")  

# Figure A13a
# ==========
# Controlling for Dept as share of GDP
out.robust.debt.gdp <- gsynth(yurate ~ mw + Debt.GDP 
                        , data = filter(sample, Year>=1991 & ccode!=305), # subset due to data availability
                        index = c("ccode","Year"), force = "two-way",
                        CV = TRUE, r = c(0, 5), se = TRUE, 
                        inference = "parametric", nboots = 1000,
                        parallel = TRUE, EM=TRUE,seed = 1111)

# Counterfactural plot
plot(out.robust.debt.gdp, type = "counterfactual", xlab="", ylab = "Youth Unemployment Rate (%)", main = "", raw = "all") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998,linetype="dashed") +  
  theme(legend.position="bottom") +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA13a.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")

# Figure A13b
# ==========

# Gap plot
plot(out.robust.debt.gdp, type="gap", xlab = "", ylab = "Gap Youth Unemployment (% points)",  main = "") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998, linetype="dashed") +  
  geom_hline(yintercept = 0,linetype="dashed") + 
  ylim(-20,20)  +
  scale_x_continuous(breaks=c(1985,1990,1995,2000,2005,2010))

ggsave("02_figures_tables/FigureA13b.pdf",
       width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in") 

# Figure A14a
# ==========
# Setting treatment year to 1997
sample_t2 <- mutate(sample, mw = ifelse(ccode == 200 & Year ==1998,1, ifelse(ccode == 200 & Year ==1997,1, mw)))
out.minus2 <- gsynth(yurate ~ mw 
              , data = sample_t2, 
              index = c("ccode","Year"), force = "two-way",
              CV = TRUE, r = c(0, 5), se = TRUE, 
              inference = "parametric", nboots = 1000,
              parallel = TRUE, EM=TRUE,seed = 1111)

# Gap plot
plot(out.minus2, type="gap", xlab = "", ylab = "Gap Youth Unemployment (% points)",  main = "") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1996,linetype="dashed") +  
  geom_hline(yintercept = 0,linetype="dashed")+ 
  ylim(-16,20)

ggsave("02_figures_tables/FigureA14a.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")

# Figure A14b
# ==========
# Setting treatment year to 1998
sample_t1 <-  mutate(sample, mw = ifelse(ccode == 200 & Year ==1998,1, mw))

out.minus1 <- gsynth(yurate ~ mw 
              , data = sample_t1, 
              index = c("ccode","Year"), force = "two-way",
              CV = TRUE, r = c(0, 5), se = TRUE, 
              inference = "parametric", nboots = 1000,
              parallel = TRUE, EM=TRUE,seed = 1111)

# Gap plot
plot(out.minus1, type="gap", xlab = "", ylab = "Gap Youth Unemployment (% points)",  main = "") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1997,linetype="dashed") + 
  geom_hline(yintercept = 0,linetype="dashed")+ 
  ylim(-16,20)

ggsave("02_figures_tables/FigureA14b.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")


# Figure A15
# ==========
# Unemployment as dependent variable
# Note: Subset sample due to data availibility and ensure it is balanced

sampleb <- sample %>% dplyr::select(unempl, mw, ccode, Year) %>% na.omit %>% data.frame 
sampleb <- make.pbalanced(sampleb, balance.type = "shared.times", index =c("ccode",  "Year"))

out.unp <- gsynth(unempl ~ mw 
             , data = sampleb, 
             index = c("ccode","Year"), force = "two-way",
             CV = TRUE, r = c(0, 5), se = TRUE, 
             inference = "parametric", nboots = 1000,
             parallel = TRUE, EM=TRUE,seed = 1111)

# Gap plot
plot(out.unp, type="gap", xlab = "", ylab = "Gap Unemployment (% points)",  main = "") + 
  theme_classic(20, base_family = "serif") + 
  geom_vline(xintercept = 1998,linetype="dashed") + 
  geom_hline(yintercept = 0,linetype="dashed") 

ggsave("02_figures_tables/FigureA15.pdf"
       , width=(6.3228344444445)*1.5, 
       height=6.3228344444445, units="in")


# ======================
# ======================
# close log file
sink()